setwd("")
rm(list=ls())

#install.packages("ri")
#install.packages("randomizr")
#install.packages("AER")
#install.packages("nnet")
#install.packages("descr")
#install.packages("estimatr")
#install.packages("sandwich")
#install.packages("ggplot2")
#install.packages("ggrepel")

library(ri)
library(randomizr)
library(AER)
library(nnet)
library(descr)
library(estimatr)
library(sandwich)
library(ggplot2)
library(ggrepel)


options(scipen=999)

# Figure B1

# Load data
tk <- read.csv("quotas.csv") # Tripp & Kang (2008) CPS
km <- read.csv("sf99femdata.csv") # Kenworthy & Malami (1999) Social Forces
r <- read.csv("ruedin.csv") # Ruedin (2012)

# Tripp & Kang

out.tk <- lm(rep2006 ~ quota + nyrrun + as.factor(fh01) + prelect + edsec + gdp01pcln + other + cathdom  + musdom + me + westeu + africa + asia + easteu + america + pacific, data=tk)
summary(out.tk)

tk$pred <- predict(out.tk)

head(tk)

TK <- data.frame(tk$rep2006, tk$pred, tk$country, "Tripp & Kang (2008)")
colnames(TK) <- c("Actual", "Predicted", "Country", "Source")
head(TK)


# Kenworthy & Malami

out.km <- lm(FEMLEGIS ~ ELECSYST + FEMSUFRG + DEMOC + MARXLEN + FEMEDUC + FEMLABOR + GDPPC + REL.CATH + REL.ISLM + REL.OTHR + RATIFIC + ABORTLEG + REG.AFRI + REG.MIDE + REG.ASIA + REG.LATA + REG.EEUR -COUNTRY, data=km, subset=c(FILT146==1))
summary(out.km)

m.km <- model.frame(out.km)
m.km$pred <- predict(out.km)

KM <- data.frame(m.km$FEMLEGIS, m.km$pred, m.km$COUNTRY, "Kenworthy & Malami (1999)")
colnames(KM) <- c("Actual", "Predicted", "Country", "Source")
head(KM)



# Ruedin

r$women <- r$women*100

quantile(r$women, seq(0,1,0.05))
quantile(r$women[r$reg_EUR==1], seq(0,1,0.05))
r$women[r$Country=="Switzerland"]

out.r <- lm(women ~ el_sys_2 + quota_p + quota_s + age_dem + FH_PR + as.factor(region) -Country, data=r)
summary(out.r)

m.r <- model.frame(out.r)
m.r$pred <- predict(out.r)
head(m.r)


R <- data.frame(m.r$women, m.r$pred, m.r$Country, "Ruedin (2012)")
colnames(R) <- c("Actual", "Predicted", "Country", "Source")
head(R)

CH <- rbind(KM, TK, R)
head(CH)

pdf(file="swiss-case-portrait.pdf", paper="special", width=4, height=8)
ggplot(data = CH, aes(x = Predicted, y = Actual)) +
	facet_wrap(~ Source, ncol = 1) +
	geom_point(data = subset(CH, Country == "Switzerland"), size = 3) +
	geom_text_repel(data = subset(CH, Country == "Switzerland"), aes(label = Country ), min.segment.length = unit(0, "lines"), segment.size = 0.25, nudge_x = 10, nudge_y = -15) +
	geom_point(data = subset(CH, Country != "Switzerland"), shape = 21, color = "black", fill = "white", size = 2) +
	geom_abline(slope = 1, intercept = 0, lty = 2) +
	labs(title = "Women's political representation:\nSwitzerland is a \"typical\" case", y ="Actual % of women in parliament", x ="Predicted % of women in parliament")
dev.off()



#Open dataset

dataset_complete_analysis<-readRDS("dataset_complete_analysis.rds")


#Table D2

#Comply versus non-Comply

fieldexperiment <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Answer_invite, Application, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male, pol_interest_mis, knowledge_female_mis, knowledge_male_mis, running_for_office_mis))

Tab_D1<-NA

Tab_D1 <- data.frame(c( rep("Political interest", 1), rep("Knowledge male candidate",1), rep("Knowledge female candidate", 1), rep("Political ambition", 1)))

Tab_D1$meancomply<- NA
Tab_D1$sdcomply <- NA
Tab_D1$ncomply <- NA
Tab_D1$meannoncomply <- NA
Tab_D1$sdnoncomply <- NA
Tab_D1$nnoncomply <- NA
Tab_D1$dim <- NA
Tab_D1$pvalue <- NA
colnames(Tab_D1) <- c("Covariate", "Mean", "SD", "N", "Mean", "SD", "N", "DiM", "P-Value")
dim(Tab_D1)
Tab_D1


#Political interest

fieldexperiment$W1_pol_interest_mis[fieldexperiment$pol_interest_mis==0]<-0
fieldexperiment$W1_pol_interest_mis[fieldexperiment$pol_interest_mis==1]<-NA

data_W1_pol_interest<-subset(fieldexperiment,!is.na(W1_pol_interest_mis))

Tab_D1[1,2]<-round(mean1<-mean(data_W1_pol_interest$pol_interest[data_W1_pol_interest$Comply==1]), digits=2)

Tab_D1[1,5]<-round(mean2<-mean(data_W1_pol_interest$pol_interest[data_W1_pol_interest$Comply==0]), digits=2)

Tab_D1[1,8]<-round(mean1-mean2, digits=2) 

tab1<-table(data_W1_pol_interest$Comply, data_W1_pol_interest$pol_interest)

Tab_D1[1,7]<-margin.table(tab1,1)[1]
Tab_D1[1,4]<-margin.table(tab1,1)[2]

Tab_D1[1,9]<-round(summary(lm_robust(data_W1_pol_interest$pol_interest~data_W1_pol_interest$Comply))$coefficients[2,4] , digits=2)

Tab_D1[1,3]<-round(sd1<-sd(data_W1_pol_interest$pol_interest[data_W1_pol_interest$Comply==1]), digits=2)

Tab_D1[1,6]<-round(sd2<-sd(data_W1_pol_interest$pol_interest[data_W1_pol_interest$Comply==0]), digits=2)


#Know male candidates


fieldexperiment$W1_knowledge_male_mis<-1
fieldexperiment$W1_knowledge_male_mis[fieldexperiment$knowledge_male_mis==1]<-NA

data_W1_knowledge_male<-subset(fieldexperiment,!is.na(knowledge_male_mis))

Tab_D1[2,2]<-round(mean1<-mean(data_W1_knowledge_male$W1_knowledge_male[data_W1_knowledge_male$Comply==1]), digits=2)

Tab_D1[2,5]<-round(mean2<-mean(data_W1_knowledge_male$W1_knowledge_male[data_W1_knowledge_male$Comply==0]), digits=2)

Tab_D1[2,8]<-round(mean1-mean2, digits=2) 

tab1<-table(data_W1_knowledge_male$Comply, data_W1_knowledge_male$W1_knowledge_male)

Tab_D1[2,7]<-margin.table(tab1,1)[1]
Tab_D1[2,4]<-margin.table(tab1,1)[2]

Tab_D1[2,9]<-round(summary(lm_robust(data_W1_knowledge_male$W1_knowledge_male~data_W1_knowledge_male$Comply))$coefficients[2,4] , digits=2)

Tab_D1[2,3]<-round(sd1<-sd(data_W1_knowledge_male$W1_knowledge_male[data_W1_knowledge_male$Comply==1]), digits=2)

Tab_D1[2,6]<-round(sd2<-sd(data_W1_knowledge_male$W1_knowledge_male[data_W1_knowledge_male$Comply==0]), digits=2)

#Know female candidates

fieldexperiment$W1_knowledge_female_mis<-1
fieldexperiment$W1_knowledge_female_mis[fieldexperiment$knowledge_female_mis==1]<-NA

data_W1_knowledge_female<-subset(fieldexperiment,!is.na(knowledge_female_mis))

Tab_D1[3,2]<-round(mean1<-mean(data_W1_knowledge_female$W1_knowledge_female[data_W1_knowledge_female$Comply==1]), digits=2)

Tab_D1[3,5]<-round(mean2<-mean(data_W1_knowledge_female$W1_knowledge_female[data_W1_knowledge_female$Comply==0]), digits=2)

Tab_D1[3,8]<-round(mean1-mean2, digits=2) 

tab1<-table(data_W1_knowledge_female$Comply, data_W1_knowledge_female$W1_knowledge_female)

Tab_D1[3,7]<-margin.table(tab1,1)[1]
Tab_D1[3,4]<-margin.table(tab1,1)[2]

Tab_D1[3,9]<-round(summary(lm_robust(data_W1_knowledge_female$W1_knowledge_female~data_W1_knowledge_female$Comply))$coefficients[2,4] , digits=2)

Tab_D1[3,3]<-round(sd1<-sd(data_W1_knowledge_female$W1_knowledge_female[data_W1_knowledge_female$Comply==1]), digits=2)

Tab_D1[3,6]<-round(sd2<-sd(data_W1_knowledge_female$W1_knowledge_female[data_W1_knowledge_female$Comply==0]), digits=2)


#Running for office

fieldexperiment$W1_running_for_office_mis<-1
fieldexperiment$W1_running_for_office_mis[fieldexperiment$running_for_office_mis==1]<-NA

data_W1_running_for_office<-subset(fieldexperiment,!is.na(W1_running_for_office_mis))

Tab_D1[4,2]<-round(mean1<-mean(data_W1_running_for_office$running_for_office[data_W1_running_for_office$Comply==1]), digits=2)

Tab_D1[4,5]<-round(mean2<-mean(data_W1_running_for_office$running_for_office[data_W1_running_for_office$Comply==0]), digits=2)

Tab_D1[4,8]<-round(mean1-mean2, digits=2) 

tab1<-table(data_W1_running_for_office$Comply, data_W1_running_for_office$running_for_office)

Tab_D1[4,7]<-margin.table(tab1,1)[1]
Tab_D1[4,4]<-margin.table(tab1,1)[2]

Tab_D1[4,9]<-round(summary(lm_robust(data_W1_running_for_office$running_for_office~data_W1_running_for_office$Comply))$coefficients[2,4] , digits=2)

Tab_D1[4,3]<-round(sd1<-sd(data_W1_running_for_office$running_for_office[data_W1_running_for_office$Comply==1]), digits=2)

Tab_D1[4,6]<-round(sd2<-sd(data_W1_running_for_office$running_for_office[data_W1_running_for_office$Comply==0]), digits=2)

#Table D1
Tab_D1


#Table E1

#Balance Table

tab_balance<-NA

tab_balance <- data.frame(c(rep("Knowledge male candidate",1), rep("Knowledge female candidate", 1), rep("Political knowledge self-report", 1), rep("Political ambition", 1),  
                            rep("Political interest", 1), rep("Economic interest", 1), rep("Social skills", 1), rep("Presentation skills", 1), rep("Networking skills", 1),
                            rep("Gender attitudes - children", 1),  rep("Gender attitudes - childcare", 1),  rep("Gender attitudes - career", 1),  rep("Gender attitudes - elections", 1),  rep("Gender attitudes - work", 1),
                            rep("Gender attitudes - politics", 1), rep("Perceptions about future", 1), rep("Career goals", 1), rep("Career entry", 1), rep("Child wish", 1), rep("Age", 1), rep("Married", 1), rep("Swiss citizenship", 1),
                            rep("Year entered uni", 1), rep("Father employed", 1), rep("Mother employed", 1), rep("Father edu cat 1", 1), rep("Father edu cat 2", 1),  rep("Father edu cat 3", 1), rep("Mother edu cat 1", 1),  rep("Mother edu cat 2", 1), rep("Mother edu cat 3", 1),
                            rep("Doing BA degree", 1),rep("Doing MA degree", 1), rep("Doing PhD", 1), rep("Faculty of Natural Sciences", 1), rep("Faculty of Medicine", 1), rep("Faculty of Philosophy", 1), rep("Faculty of Law", 1), rep("Faculty of Veterinary Medicine", 1), rep("Faculty of Economics", 1)))
tab_balance$meantr<- NA
tab_balance$meanc <- NA
tab_balance$p <- NA
colnames(tab_balance) <- c("Covariate", "Mean Treatment", "Mean Control", "P-Value")
dim(tab_balance)



fieldexperiment <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Application, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))

set.seed(1234567)

Z <- fieldexperiment$Treatment

Block_var <- fieldexperiment$Block

block_m_each=rbind(c(190,377),
                   c(5,  10),
                   c(12,  18))


Blocked_design <- declare_ra(blocks=Block_var, block_m_each=block_m_each)
Z_obs <- factor(Z, levels = 0:1, labels = c(0, 1))
prob_obs <- obtain_condition_probabilities(Blocked_design, Z_obs)

#Know male candidates

tab_balance[1,2]<-round(mean(fieldexperiment$W1_knowledge_male[fieldexperiment$Treatment==0], weight=1/prob_obs), digits=2)

tab_balance[1,3]<-round(mean(fieldexperiment$W1_knowledge_male[fieldexperiment$Treatment==1], weight=1/prob_obs), digits=2)

tab_balance[1,4]<-round(summary(lm_robust(fieldexperiment$W1_knowledge_male~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2)


#Know female candidates

tab_balance[2,2]<-round(mean(fieldexperiment$W1_knowledge_female[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[2,3]<-round(mean(fieldexperiment$W1_knowledge_female[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[2,4]<-round(summary(lm_robust(fieldexperiment$W1_knowledge_female~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)


#Political knowledge self-report

tab_balance[3,2]<-round(mean(fieldexperiment$pol_knowledge_self_report[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[3,3]<-round(mean(fieldexperiment$pol_knowledge_self_report[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[3,4]<-round(summary(lm_robust(fieldexperiment$pol_knowledge_self_report~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)



#Running for office


tab_balance[4,2]<-round(mean(fieldexperiment$running_for_office[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[4,3]<-round(mean(fieldexperiment$running_for_office[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[4,4]<-round(summary(lm_robust(fieldexperiment$running_for_office~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)



#Political interest
tab_balance[5,2]<-round(mean(fieldexperiment$pol_interest[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[5,3]<-round(mean(fieldexperiment$pol_interest[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[5,4]<-round(summary(lm_robust(fieldexperiment$pol_interest~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 



#Economic interest

tab_balance[6,2]<-round(mean(fieldexperiment$econ_interest[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[6,3]<-round(mean(fieldexperiment$econ_interest[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[6,4]<-round(summary(lm_robust(fieldexperiment$econ_interest~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)


#Social skills

tab_balance[7,2]<-round(mean(fieldexperiment$social_skills[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[7,3]<-round(mean(fieldexperiment$social_skills[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[7,4]<-round(summary(lm_robust(fieldexperiment$social_skills~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Presentation skills

tab_balance[8,2]<-round(mean(fieldexperiment$presentation_skills[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[8,3]<-round(mean(fieldexperiment$presentation_skills[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[8,4]<-round(summary(lm_robust(fieldexperiment$presentation_skills~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Networking skills

tab_balance[9,2]<-round(mean(fieldexperiment$networking_skills[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[9,3]<-round(mean(fieldexperiment$networking_skills[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[9,4]<-round(summary(lm_robust(fieldexperiment$networking_skills~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 

#Gender children

tab_balance[10,2]<-round(mean(fieldexperiment$gender_children[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[10,3]<-round(mean(fieldexperiment$gender_children[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[10,4]<-round(summary(lm_robust(fieldexperiment$gender_children~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 



#Gender childcare

tab_balance[11,2]<-round(mean(fieldexperiment$gender_childcare[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[11,3]<-round(mean(fieldexperiment$gender_childcare[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[11,4]<-round(summary(lm_robust(fieldexperiment$gender_childcare~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Gender career

tab_balance[12,2]<-round(mean(fieldexperiment$gender_career[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[12,3]<-round(mean(fieldexperiment$gender_career[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[12,4]<-round(summary(lm_robust(fieldexperiment$gender_career~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Gender election

tab_balance[13,2]<-round(mean(fieldexperiment$gender_election[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[13,3]<-round(mean(fieldexperiment$gender_election[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[13,4]<-round(summary(lm_robust(fieldexperiment$gender_election~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Gender work

tab_balance[14,2]<-round(mean(fieldexperiment$gender_work[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[14,3]<-round(mean(fieldexperiment$gender_work[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[14,4]<-round(summary(lm_robust(fieldexperiment$gender_work~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)


#Gender politics

tab_balance[15,2]<-round(mean(fieldexperiment$gender_politics[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[15,3]<-round(mean(fieldexperiment$gender_politics[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[15,4]<-round(summary(lm_robust(fieldexperiment$gender_politics~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)



#Perceptions future

tab_balance[16,2]<-round(mean(fieldexperiment$future_per[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[16,3]<-round(mean(fieldexperiment$future_per[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[16,4]<-round(summary(lm_robust(fieldexperiment$future_per~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)


#Career goals

tab_balance[17,2]<-round(mean(fieldexperiment$career_goals_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[17,3]<-round(mean(fieldexperiment$career_goals_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[17,4]<-round(summary(lm_robust(fieldexperiment$career_goals_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)


#Career entry

tab_balance[18,2]<-round(mean(fieldexperiment$career_entry_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[18,3]<-round(mean(fieldexperiment$career_entry_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[18,4]<-round(summary(lm_robust(fieldexperiment$career_entry_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)


#Children
tab_balance[19,2]<-round(mean(fieldexperiment$children_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[19,3]<-round(mean(fieldexperiment$children_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)


tab_balance[19,4]<-round(summary(lm_robust(fieldexperiment$children_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4] , digits=2)


#Age
tab_balance[20,2]<-round(mean(fieldexperiment$age[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[20,3]<-round(mean(fieldexperiment$age[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[20,4]<-round(summary(lm_robust(fieldexperiment$age~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Married
tab_balance[21,2]<-round(mean(fieldexperiment$married[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[21,3]<-round(mean(fieldexperiment$married[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[21,4]<-round(summary(lm_robust(fieldexperiment$married~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Swiss
tab_balance[22,2]<-round(mean(fieldexperiment$Swiss_citizenship[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[22,3]<-round(mean(fieldexperiment$Swiss_citizenship[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[22,4]<-round(summary(lm_robust(fieldexperiment$Swiss_citizenship~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Year uni
tab_balance[23,2]<-round(mean(fieldexperiment$year_uni[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[23,3]<-round(mean(fieldexperiment$year_uni[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[23,4]<-round(summary(lm_robust(fieldexperiment$year_uni~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Employment father 1
tab_balance[24,2]<-round(mean(fieldexperiment$employment_father_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[24,3]<-round(mean(fieldexperiment$employment_father_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[24,4]<-round(summary(lm_robust(fieldexperiment$employment_father_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Employment mother 1
tab_balance[25,2]<-round(mean(fieldexperiment$employment_mother_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[25,3]<-round(mean(fieldexperiment$employment_mother_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[25,4]<-round(summary(lm_robust(fieldexperiment$employment_mother_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Edu father
tab_balance[26,2]<-round(mean(fieldexperiment$edu_father_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[26,3]<-round(mean(fieldexperiment$edu_father_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)


tab_balance[26,4]<-round(summary(lm_robust(fieldexperiment$edu_father_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[27,2]<-round(mean(fieldexperiment$edu_father_2[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[27,3]<-round(mean(fieldexperiment$edu_father_2[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)


tab_balance[27,4]<-round(summary(lm_robust(fieldexperiment$edu_father_2~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[28,2]<-round(mean(fieldexperiment$edu_father_3[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[28,3]<-round(mean(fieldexperiment$edu_father_3[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[28,4]<-round(summary(lm_robust(fieldexperiment$edu_father_3~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


#Edu mother
tab_balance[29,2]<-round(mean(fieldexperiment$edu_mother_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[29,3]<-round(mean(fieldexperiment$edu_mother_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[29,4]<-round(summary(lm_robust(fieldexperiment$edu_mother_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[30,2]<-round(mean(fieldexperiment$edu_mother_2[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[30,3]<-round(mean(fieldexperiment$edu_mother_2[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[30,4]<-round(summary(lm_robust(fieldexperiment$edu_mother_2~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[31,2]<-round(mean(fieldexperiment$edu_mother_3[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[31,3]<-round(mean(fieldexperiment$edu_mother_3[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[31,4]<-round(summary(lm_robust(fieldexperiment$edu_mother_3~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 



#Type of degree
tab_balance[32,2]<-round(mean(fieldexperiment$degree_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[32,3]<-round(mean(fieldexperiment$degree_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[32,4]<-round(summary(lm_robust(fieldexperiment$degree_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[33,2]<-round(mean(fieldexperiment$degree_2[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[33,3]<-round(mean(fieldexperiment$degree_2[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

fieldexperiment$degree_2[fieldexperiment$Treatment==1]

tab_balance[33,4]<-round(summary(lm_robust(fieldexperiment$degree_2~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[34,2]<-round(mean(fieldexperiment$degree_3[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[34,3]<-round(mean(fieldexperiment$degree_3[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[34,4]<-round(summary(lm_robust(fieldexperiment$degree_3~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 




#Faculty

tab_balance[35,2]<-round(mean(fieldexperiment$division_1[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[35,3]<-round(mean(fieldexperiment$division_1[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[35,4]<-round(summary(lm_robust(fieldexperiment$division_1~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[36,2]<-round(mean(fieldexperiment$division_2[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[36,3]<-round(mean(fieldexperiment$division_2[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[36,4]<-round(summary(lm_robust(fieldexperiment$division_2~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[37,2]<-round(mean(fieldexperiment$division_3[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[37,3]<-round(mean(fieldexperiment$division_3[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[37,4]<-round(summary(lm_robust(fieldexperiment$division_3~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[38,2]<-round(mean(fieldexperiment$division_4[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[38,3]<-round(mean(fieldexperiment$division_4[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[38,4]<-round(summary(lm_robust(fieldexperiment$division_4~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[39,2]<-round(mean(fieldexperiment$division_6[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[39,3]<-round(mean(fieldexperiment$division_6[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[39,4]<-round(summary(lm_robust(fieldexperiment$division_6~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 


tab_balance[40,2]<-round(mean(fieldexperiment$division_7[fieldexperiment$Treatment==0], weight=1/prop_obs), digits=2)
tab_balance[40,3]<-round(mean(fieldexperiment$division_7[fieldexperiment$Treatment==1], weight=1/prop_obs), digits=2)

tab_balance[40,4]<-round(summary(lm_robust(fieldexperiment$division_7~fieldexperiment$Treatment, weight=1/prob_obs))$coefficients[2,4], digits=2) 

tab_balance


#Figure E1

#Balance Check Treatment vs Control Field Experiment

fieldexperiment <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Application, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))

Z <- fieldexperiment$Treatment

Block_var <- fieldexperiment$Block

table(dataset_complete_analysis$implicit_bias)

table(Block_var, Z)

block_m_each=rbind(c(190,377),
                   c(5,  10),
                   c(12,  18))

set.seed(1234567)

nsims <- 5000

probs <- genprobexact(Z, blockvar=Block_var)

probs

X <- as.matrix(fieldexperiment[,5:46])

Blocked_design <- declare_ra(blocks=Block_var, block_m_each=block_m_each)
Z_obs <- factor(Z, levels = 0:1, labels = c(0, 1))
prob_obs <- obtain_condition_probabilities(Blocked_design, Z_obs)

fstatstore <- rep(NA,nsims)

for (i in 1:nsims) {
  fstatstore[i] <- summary(lm(block_ra(blocks=Block_var, block_m_each=block_m_each)~X, weights = 1/prob_obs))$fstatistic[1]
}

fstat <- summary(lm(Z~X, weights = 1/prob_obs))$fstatistic[1]

mean(fstat<= fstatstore) #0.57

#Figure E1a
pdf(paste("balance_fieldexperiment.pdf"),w=6,h=5)
hist(fstatstore, breaks = 100, main = paste("Sampling distribution of the estimated f-statistic"), xlab=("Estimated f-statistic"))
abline(v=fstat, col="red", lwd=3)
dev.off()


#Attrition field experient

fieldexperiment2 <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, running_for_office_w2, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))


fieldexperiment2$attrition<-1
fieldexperiment2$attrition[fieldexperiment2$running_for_office_w2==0]<-0
fieldexperiment2$attrition[fieldexperiment2$running_for_office_w2==1]<-0
fieldexperiment2$attrition[fieldexperiment2$running_for_office_w2==2]<-0
fieldexperiment2$attrition[fieldexperiment2$running_for_office_w2==3]<-0
fieldexperiment2$attrition[fieldexperiment2$running_for_office_w2==4]<-0

table(fieldexperiment2$attrition)

Z <- fieldexperiment2$Treatment

Block_var <- fieldexperiment2$Block

X<-fieldexperiment2$attrition

table(Block_var, Z)

Block_m_each=rbind(c(190,377),
                   c(5,  10),
                   c(12,  18))
set.seed(1234567)

nsims <- 5000

probs <- genprobexact(Z, blockvar=Block_var)

probs

Blocked_design <- declare_ra(blocks=Block_var, block_m_each=Block_m_each)
Z_obs <- factor(Z, levels = 0:1, labels = c(0, 1))
prob_obs <- obtain_condition_probabilities(Blocked_design, Z_obs)

fstatstore <- rep(NA,nsims)

for (i in 1:nsims) {
  fstatstore[i] <- summary(lm(block_ra(blocks=Block_var, block_m_each=Block_m_each)~X, weights = 1/prob_obs))$fstatistic[1]
}

fstat <- summary(lm(Z~X, weights = 1/prob_obs))$fstatistic[1]

mean(fstat<= fstatstore) #0.56

#Figure E1b

pdf(paste("attrition_fieldexperiment.pdf"),w=6,h=5)
hist(fstatstore, breaks = 100, main = paste("Sampling distribution of the estimated f-statistic"), xlab=("Estimated f-statistic"))
abline(v=fstat, col="red", lwd=3)
dev.off()


#Figure F - Power simulations

#Cohen's d=.2

dataset_complete_analysis<-readRDS("dataset_complete_analysis.rds")

fieldexperiment1 <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Answer_invite, running_for_office_w2, age, married,  Swiss_citizenship, year_uni, employment_father_1, employment_father_3, employment_mother_1, employment_mother_3, edu_father_4,	edu_father_3,	edu_father_2, edu_father_1,	edu_mother_4,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3,	division_4,	division_5,	division_6,	division_7, degree_1, degree_2,	degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2,	social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male, knowledge_male_mis, knowledge_female_mis))

fieldexperiment1 <-subset(fieldexperiment1,!is.na(running_for_office_w2), select=c(Treatment, Block, Comply, Answer_invite, running_for_office_w2, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))

set.seed(1234567)
Z <- fieldexperiment1$Treatment
table(Z)
Blockvar<-fieldexperiment1$Block
Y <- fieldexperiment1$running_for_office_w2

probs <- genprobexact(Z, blockvar=Blockvar)
probs

w <- Z/probs + (1-Z)/(1-probs)


#Power
X1 <- fieldexperiment1$running_for_office

power<-lm_robust(Y~Z+X1, weight=w)

summary(power)

mean(Y)
sd(Y)

#sd=1.10 of outcome

d1=1.10*.2 #d1=.22

d2=1.10*.3 #d2=.33

#small effect

possible.ns <- seq(from=100, to=1000, by=10)
powers <- rep(NA, length(possible.ns))
powers.cov <- rep(NA, length(possible.ns))        
alpha <- 0.05
sims <- 5000
for (j in 1:length(possible.ns)){
  N <- possible.ns[j]
  
  significant.experiments <- rep(NA, sims)
  significant.experiments.cov <- rep(NA, sims)    
  
  for (i in 1:sims){
    pretrdep <- sample(x=0:4, size=N, replace=TRUE)    # Generate the pre-treatment covariate
    effectofpretrdep <- 0.68                                # "Effect" of the covariate
    
    ## Control Outcome as a function of covariate and error
    Y0 <- effectofpretrdep*pretrdep  + rnorm(n=N, mean=0.98, sd=1.10) 
    
    tau <- 0.22
    Y1 <- Y0 + tau
    Z.sim <- rbinom(n=N, size=1, prob=.34)
    Y.sim <- Y1*Z.sim + Y0*(1-Z.sim)
    fit.sim <- lm(Y.sim ~ Z.sim)
    
    ## Analysis -- including covariate to increase precision ##
    fit.sim.cov <- lm(Y.sim ~ Z.sim  + pretrdep)
    
    ## extract p-values and calculate significance ##
    p.value <- summary(fit.sim)$coefficients[2,4]
    p.value.cov <- summary(fit.sim.cov)$coefficients[2,4]
    significant.experiments[i] <- (p.value <= alpha)
    significant.experiments.cov[i] <- (p.value.cov <= alpha)
  }
  
  powers[j] <- mean(significant.experiments)
  powers.cov[j] <- mean(significant.experiments.cov)
}

powers
powers.cov
power_exp<-powers.cov[35] #0.502

#Figure F1a

pdf(file="power1.pdf", width=7, height=5.5)
plot(possible.ns, powers, ylim=c(0,1), xlab=("sample sizes"), ylab=("simulated power"))
points(possible.ns, powers.cov, col="red")
abline(h = 0.8, v = 0, col = "red")
abline(v = 451, col = "black")
dev.off()

#black line = power unadjusted
#red line = power with covariate adjustment


#medium sized effect

possible.ns <- seq(from=100, to=1000, by=10)
powers <- rep(NA, length(possible.ns))
powers.cov <- rep(NA, length(possible.ns))       
alpha <- 0.05
sims <- 5000
for (j in 1:length(possible.ns)){
  N <- possible.ns[j]
  
  significant.experiments <- rep(NA, sims)
  significant.experiments.cov <- rep(NA, sims)      
  
  for (i in 1:sims){
    pretrdep <- sample(x=0:4, size=N, replace=TRUE)    # Generate pre-treatment covariate
    effectofpretrdep <- 0.68                                # "Effect" of covariate
    
    ## Control Outcome as a function of covariate and error
    Y0 <- effectofpretrdep*pretrdep  + rnorm(n=N, mean=0.98, sd=1.10) 
    
    tau <- 0.33
    Y1 <- Y0 + tau
    Z.sim <- rbinom(n=N, size=1, prob=.34)
    Y.sim <- Y1*Z.sim + Y0*(1-Z.sim)
    fit.sim <- lm(Y.sim ~ Z.sim)
    
    ## Analysis -- including covariate to increase precision ##
    fit.sim.cov <- lm(Y.sim ~ Z.sim  + pretrdep)
    
    ## extract p-values and calculate significance ##
    p.value <- summary(fit.sim)$coefficients[2,4]
    p.value.cov <- summary(fit.sim.cov)$coefficients[2,4]
    significant.experiments[i] <- (p.value <= alpha)
    significant.experiments.cov[i] <- (p.value.cov <= alpha)
  }
  
  powers[j] <- mean(significant.experiments)
  powers.cov[j] <- mean(significant.experiments.cov)
}

powers
powers.cov
power_exp<-powers.cov[35] #0.84

#Figure F1b
pdf(file="power2.pdf", width=7, height=5.5)
plot(possible.ns, powers, ylim=c(0,1), xlab=("sample sizes"), ylab=("simulated power"))
points(possible.ns, powers.cov, col="red")
abline(h = 0.8, v = 0, col = "red")
abline(v = 451, col = "black")
dev.off()

#black line = power unadjusted
#red line = power with covariate adjustment


#Table G1

dataset_complete_analysis<-readRDS("dataset_complete_analysis.rds")


tab_CACE2 <-NA
tab_CACE2 <- data.frame(c(rep("Comply",1), rep("CACE", 2), rep("N", 1)), c(rep(c("", "No", "Yes", ""))))
tab_CACE2$CACE_behav <- NA
tab_CACE2$cil_behav <- NA
tab_CACE2$cih_behav <- NA
tab_CACE2$CACE_att <- NA
tab_CACE2$cil_att <- NA
tab_CACE2$cih_att <- NA
colnames(tab_CACE2) <- c("Estimate", "Covariate_adjustment", "CACE", "cil", "ciu", "CACE", "cil", "ciu")
dim(tab_CACE2)
tab_CACE2


fieldexperiment <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Answer_invite, Application,age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))

Z <- fieldexperiment$Treatment
Blockvar<-fieldexperiment$Block
Y <- fieldexperiment$Application
X <- as.matrix(fieldexperiment[,6:47])
D1<-fieldexperiment$Comply
D2<-fieldexperiment$Answer_invite

set.seed(1234567)

table(D1)
table(D2)
table(Y)
probs <- genprobexact(Z, blockvar=Blockvar)
probs

w <- Z/probs + (1-Z)/(1-probs)


tab_CACE2[1,3]<-round(crosstab(Z, D2, weight = w, prop.r = TRUE)$prop.row[2,2], digits=2)
tab_CACE2

#Answer_invite

w <- Z/probs + (1-Z)/(1-probs)
CACE1 <- ivreg(Y~D2 | Z, weight=w)
coeftest(CACE1,vcovHC(CACE1, type="HC2"))
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
tab_CACE2[2,3] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1], digits=2)
tab_CACE2[2,4] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)
tab_CACE2[2,5] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)


CACE2 <- ivreg(Y~D2 + X| Z + X, weight=w)
summary(CACE2)
coeftest(CACE2,vcovHC(CACE2, type="HC2"))
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
tab_CACE2[3,3] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1], digits=2)
tab_CACE2[3,4] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)
tab_CACE2[3,5] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)

tab_CACE2[4,3] <- table(fieldexperiment$Treatment)[1]+table(fieldexperiment$Treatment)[2]

tab_CACE2


fieldexperiment1 <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Answer_invite, running_for_office_w2, age, married,  Swiss_citizenship, year_uni, employment_father_1, employment_father_3, employment_mother_1, employment_mother_3, edu_father_4,	edu_father_3,	edu_father_2, edu_father_1,	edu_mother_4,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3,	division_4,	division_5,	division_6,	division_7, degree_1, degree_2,	degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2,	social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male, knowledge_male_mis, knowledge_female_mis))

fieldexperiment1 <-subset(fieldexperiment1,!is.na(running_for_office_w2), select=c(Treatment, Block, Comply, Answer_invite, running_for_office_w2, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))

table(dataset_complete_analysis$running_for_office)


Z <- fieldexperiment1$Treatment
Blockvar <- fieldexperiment1$Block
Y <- fieldexperiment1$running_for_office_w2
X <- as.matrix(fieldexperiment1[,6:47])
D1 <- fieldexperiment1$Comply
D2 <- fieldexperiment1$Answer_invite


set.seed(1234567)

table(Z)
table(D1)

probs <- genprobexact(Z, blockvar=Blockvar)
probs

w <- Z/probs + (1-Z)/(1-probs)

tab_CACE2[1,6]<-round(crosstab(Z, D2, weight = w, prop.r = TRUE)$prop.row[2,2], digits=2)
tab_CACE2

CACE1 <- ivreg(Y~D2 | Z, weight=w)
coeftest(CACE1,vcovHC(CACE1, type="HC2"))
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
tab_CACE2[2,6] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1], digits=2)
tab_CACE2[2,7] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)
tab_CACE2[2,8] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)


CACE2 <- ivreg(Y~D2 + X| Z + X, weight=w)
summary(CACE2)
coeftest(CACE2,vcovHC(CACE2, type="HC2"))
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
tab_CACE2[3,6] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1], digits=2)
tab_CACE2[3,7] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)
tab_CACE2[3,8] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)

tab_CACE2[4,6] <- table(fieldexperiment1$Treatment)[1]+table(fieldexperiment1$Treatment)[2]

#Table G1

tab_CACE2

#Table G2

dataset_complete_analysis<-readRDS("dataset_complete_analysis.rds")

tab_media2 <- data.frame(c(rep("Comply",1), rep("CACE", 2), rep("N", 1)),  c(rep(c("", "No", "Yes", ""))))
tab_media2$ITT_media <- NA
tab_media2$cil_media <- NA
tab_media2$cih_media <- NA
tab_media2$ITT_online <- NA
tab_media2$cil_online <- NA
tab_media2$cih_online <- NA
tab_media2$ITT_knowledge <- NA
tab_media2$cil_knowledge <- NA
tab_media2$cih_knowledge <- NA
colnames(tab_media2) <- c("Estimate", "Covariate_adjustment", "CACE", "cil", "ciu", "CACE", "cil", "ciu", "CACE", "cil", "ciu")


fieldexperiment3 <- subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Answer_invite, campaign_media_w2, age, married,  Swiss_citizenship, year_uni, employment_father_1,  employment_mother_1, edu_father_4,  edu_father_3,	edu_father_2,	edu_father_1,	edu_mother_4,	edu_mother_3,	edu_mother_2,	edu_mother_1, division_1, division_2,	division_3,	division_4,	division_5,	division_6,	division_7,	degree_1, degree_2,	degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2,	social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male, knowledge_male_mis, knowledge_female_mis))
fieldexperiment3
fieldexperiment3 <- subset(fieldexperiment3,!is.na(campaign_media_w2), select=c(Treatment, Block, Comply, Answer_invite, campaign_media_w2, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))
dim(fieldexperiment3)

Z <- fieldexperiment3$Treatment
Blockvar <- fieldexperiment3$Block
Y <- fieldexperiment3$campaign_media_w2
X <- as.matrix(fieldexperiment3[,6:47])
D1 <- fieldexperiment3$Comply
D2 <- fieldexperiment3$Answer_invite

dim(fieldexperiment3)

table(Z)
table(D2)

table(Y)

probs <- genprobexact(Z, blockvar=Blockvar)
probs

w <- Z/probs + (1-Z)/(1-probs)

tab_media2[1,3]<-round(crosstab(Z, D2, weight = w, prop.r = TRUE)$prop.row[2,2], digits=2)


# CACE

w <- Z/probs + (1-Z)/(1-probs)
CACE1 <- ivreg(Y~D2 | Z, weight=w)
coeftest(CACE1,vcovHC(CACE1, type="HC2"))
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
tab_media2[2,3] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1], digits=2)
tab_media2[2,4] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)
tab_media2[2,5] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)


CACE2 <- ivreg(Y~D2 + X| Z + X, weight=w)
summary(CACE2)
coeftest(CACE2,vcovHC(CACE1, type="HC2"))
round(coeftest(CACE2,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
round(coeftest(CACE2,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
tab_media2[3,3] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1], digits=2)
tab_media2[3,4] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)
tab_media2[3,5] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)

tab_media2[4,3] <- round(table(fieldexperiment3$Treatment)[1]+table(fieldexperiment3$Treatment)[2], digits=0)


#follow campaign online

fieldexperiment4 <-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Answer_invite, campaign_online_w2, age, married,  Swiss_citizenship, year_uni, employment_father_1,  employment_mother_1, edu_father_4,  edu_father_3,  edu_father_2,	edu_father_1,	edu_mother_4,	edu_mother_3,	edu_mother_2,	edu_mother_1, division_1, division_2,	division_3,	division_4,	division_5,	division_6,	division_7,	degree_1, degree_2,	degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2,	social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male, knowledge_male_mis, knowledge_female_mis))
fieldexperiment4
fieldexperiment4 <-subset(fieldexperiment4,!is.na(campaign_online_w2), select=c(Treatment, Block, Comply, Answer_invite, campaign_online_w2, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))
dim(fieldexperiment4)

Z <- fieldexperiment4$Treatment
Blockvar <- fieldexperiment4$Block
Y <- fieldexperiment4$campaign_online_w2
X <- as.matrix(fieldexperiment4[,6:45])
D1 <- fieldexperiment4$Comply
D2 <- fieldexperiment4$Answer_invite

dim(fieldexperiment4)
table(Y)
table(Z)
table(Z, D1)

probs <- genprobexact(Z, blockvar=Blockvar)
probs

w <- Z/probs + (1-Z)/(1-probs)

tab_media2[1,6]<-round(crosstab(Z, D2, weight = w, prop.r = TRUE)$prop.row[2,2], digits=2)


# CACE

w <- Z/probs + (1-Z)/(1-probs)
CACE1 <- ivreg(Y~D2 | Z, weight=w)
coeftest(CACE1,vcovHC(CACE1, type="HC2"))
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
tab_media2[2,6] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1], digits=2)
tab_media2[2,7] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)
tab_media2[2,8] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)


CACE2 <- ivreg(Y~D2 + X| Z + X, weight=w)
coeftest(CACE2,vcovHC(CACE2, type="HC2"))
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE2, type="HC2"))[2,2], 2)
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE2, type="HC2"))[2,2], 2)
tab_media2[3,6] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1], digits=2)
tab_media2[3,7] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)
tab_media2[3,8] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)

tab_media2[4,6] <- round(table(fieldexperiment4$Treatment)[1]+table(fieldexperiment4$Treatment)[2], digits=0)


#Knowledge

fieldexperiment_knowledge<-subset(dataset_complete_analysis,!is.na(Treatment), select=c(Treatment, Block, Comply, Answer_invite, W2_knowledge, age, married,  Swiss_citizenship, year_uni, employment_father_1,  employment_mother_1, edu_father_4,  edu_father_3,  edu_father_2,  edu_father_1,	edu_mother_4,	edu_mother_3,	edu_mother_2,	edu_mother_1, division_1, division_2,	division_3,	division_4,	division_5,	division_6,	division_7,	degree_1, degree_2, degree_3,	future_per,	career_goals_1, career_entry_1, children_1, children_2,	social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male, knowledge_male_mis, knowledge_female_mis))

fieldexperiment_knowledge <-subset(fieldexperiment_knowledge,!is.na(W2_knowledge), select=c(Treatment, Block, Comply, Answer_invite, W2_knowledge, age, married, Swiss_citizenship, year_uni, employment_father_1, employment_mother_1, edu_father_3,	edu_father_2, edu_father_1,	edu_mother_3,	edu_mother_2, edu_mother_1, division_1, division_2,	division_3, division_4,	division_6,	division_7, degree_1, degree_2, future_per,	career_goals_1, career_entry_1, children_1, children_2, social_skills,	presentation_skills,	networking_skills,	running_for_office,	politicians	,	gender_childcare,	gender_career,	gender_election	, gender_work,	gender_politics, gender_children,	econ_interest,	pol_interest,	pol_knowledge_self_report, W1_knowledge_female, W1_knowledge_male))

Z <- fieldexperiment_knowledge$Treatment
Blockvar <- fieldexperiment_knowledge$Block
Y <- fieldexperiment_knowledge$W2_knowledge
X <- as.matrix(fieldexperiment_knowledge[,6:36])
D1 <- fieldexperiment_knowledge$Comply
D2 <- fieldexperiment_knowledge$Answer_invite

table(Y)
table(Z)

probs <- genprobexact(Z, blockvar=Blockvar)
probs

w <- Z/probs + (1-Z)/(1-probs)

tab_media2[1,9]<-round(crosstab(Z, D2, weight = w, prop.r = TRUE)$prop.row[2,2], digits=2)


# CACE

## CACE1 ##

w <- Z/probs + (1-Z)/(1-probs)
CACE1 <- ivreg(Y~D2 | Z, weight=w)
coeftest(CACE1,vcovHC(CACE1, type="HC2"))
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], 2)
tab_media2[2,9] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1], digits=2)
tab_media2[2,10] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)
tab_media2[2,11] <- round(coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE1,vcovHC(CACE1, type="HC2"))[2,2], digits=2)


CACE2 <- ivreg(Y~D2 + X| Z + X, weight=w)
coeftest(CACE2,vcovHC(CACE2, type="HC2"))
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], 2)
tab_media2[3,9] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1], digits=2)
tab_media2[3,10] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] - qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)
tab_media2[3,11] <- round(coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,1] + qnorm(0.975)*coeftest(CACE2,vcovHC(CACE2, type="HC2"))[2,2], digits=2)


tab_media2[4,9] <- round(table(fieldexperiment_knowledge$Treatment)[1]+table(fieldexperiment_knowledge$Treatment)[2], digits=0)

#Table G2
tab_media2


#Compile log file

sink("log_appendix.txt")

#Table D1
print(Tab_D1)

#Table E1
print(tab_balance)

#Table G1
print(tab_CACE2)

#Table G2
print(tab_media2)

sink()
